% Mikoaj Czajkowski , Wiktor Budziski 
% "Simulation error in maximum likelihood estimation of discrete choice
% models"
%
% Online materials
% 2019-03-08 

% Script calculates regressions and then uses these results to produce Table E1

% "Table E1. Minimum number of Sobol draws required for desired level of log-likelihood and parameter estimates precision (95% confidence intervals in [] brackets)"

% INPUT: Code uses Output matrix from 'MTL_results05_E.mat' which can be generated with MTLcalc_highest.m code

% OUTPUT: Code generates excel spreadsheet with Table E1, and also saves
% results of regressions as Table E2

rng(10000001);
clear
clc

%% Reading data and setting matrices

load('MTL_results05_E.mat')

Names = {'Cons.'; 'No. of CT'; 'No. of individuals';  'Log of No. of draws';  ...
     'No. of attributes = 10'; 'Log of No. of draws (*10 Att)'; 'Parameter for std';'SQ';'Cost';  ...
     'R2'; 'N'};
Table_out = cell(length(Names)+1,4); % Table with results
Table_out(2:end,1) = Names;
Table_out(1,:) = {' ', 'LL', 'Betas','Z stats'}; 

XX = [Output(:,43), Output(:,44)/1000,Output(:,45) == 10, log(Output(:,42))];
XX = [XX(:,1:end-1), XX(:,end).*(Output(:,45) == 5), XX(:,end).*(Output(:,45) == 10)];
XX = XX(:, [1:2 4 3 5]);

Exact = 1; % if 0 uses median from simulation, otherwise uses transformation of estimates
SimLL = cell(2,3,3); % (5,10) x (CT) x NP
SimB = cell(2,2,3,3); % (0.025, 0.05) x (5, 10) x CT x NP
%% Regression for LL

Y = Output(:,1);
X = XX;

X = X(~isnan(Y),:);
Y = Y(~isnan(Y),:);
T = zeros(size(X,2)+1,2);
P = zeros(size(X,2)+1,1);

res=fitlm(X, log(Y));

R2 = res.Rsquared.Ordinary;
stdx = sqrt(diag(res.CoefficientCovariance));
COV1 = res.CoefficientCovariance;
VarX =var(table2array(res.Residuals(:,1)));
T(:,1) =table2array(res.Coefficients(:,1));
T(:, 2) =stdx;
pval = pv(T(:,1, 1), stdx);
P(:, 1) =pval;
for j =1:(size(X,2)+1)
    if P(j,1) <= 0.01
        Table_out(j+1,2) = {[num2str(T(j,1,1), '%8.4f'), '***     ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) <= 0.05 &&  P(j,1) > 0.01
        Table_out(j+1,2) = {[num2str(T(j,1,1), '%8.4f'), '**      ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) <= 0.1 &&  P(j,1) > 0.05
        Table_out(j+1,2) = {[num2str(T(j,1,1), '%8.4f'), '*      ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) > 0.1
        Table_out(j+1,2) = {[num2str(T(j,1,1), '%8.4f'), '       ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    end
end
Table_out(size(Table_out,1)-1,2) = {num2str(R2, '%1.4f')};
Table_out(size(Table_out,1),2) = {num2str(size(Y,1), '%8.4f')};

% Simulation for Table 5

SimX = mvnrnd(T(:,1)', res.CoefficientCovariance, 10000);
SimXX = zeros(10000,6,2);
SimXX(:,:,1) = [SimX(:,2)*4,SimX(:,2)*8,SimX(:,2)*12, SimX(:,3)*0.4,SimX(:,3)*0.8,SimX(:,3)*1.2]; % CT and NP
SimXX(:,:,2) = [SimX(:,2)*4,SimX(:,2)*8,SimX(:,2)*12, SimX(:,3)*0.4,SimX(:,3)*0.8,SimX(:,3)*1.2]; % CT and NP
SimXXExact = zeros(6,2);
SimXXExact(:,1) = [T(2,1)*4,T(2,1)*8,T(2,1)*12, T(3,1)*0.4,T(3,1)*0.8,T(3,1)*1.2]';
SimXXExact(:,2)= [T(2,1)*4,T(2,1)*8,T(2,1)*12, T(3,1)*0.4,T(3,1)*0.8,T(3,1)*1.2]';

SimXXX = [0*SimX(:,5), SimX(:,5)]; % 5 or 10 att
SimXXXExact = [0, T(5,1)];
SimCost = [SimX(:,4), SimX(:,6)];
SimCostExact = [T(4,1), T(6,1)];
TmpX = log(chi2inv(0.95,1)/2);
for i = 1:3
    for j = 1:3
        for k = 1:2
            MinDraws = exp((SimX(:,1) + SimXX(:,i,k) + SimXX(:,3+j,k)+ SimXXX(:,k) - TmpX)./(-SimCost(:,k)));
            if Exact == 0
                SimLL(k,j,i) =  {[num2str(median(MinDraws), '%8.0f'), '                ', '[', num2str(prctile(MinDraws, 2.5), '%8.0f'), '-',num2str(prctile(MinDraws, 97.5), '%8.0f'), ']']};
            else
                MinDrawsExact = exp((T(1,1)+ SimXXExact(i,k) + SimXXExact(3+j,k)+ SimXXXExact(k) - TmpX)./(-SimCostExact(k)));
                SimLL(k,j,i) =  {[num2str(MinDrawsExact, '%8.0f'), '                ', '[', num2str(prctile(MinDraws, 2.5), '%8.0f'), '-',num2str(prctile(MinDraws, 97.5), '%8.0f'), ']']};

            end
        end
    end
end
SimLL = reshape(SimLL, [2,9]);
   
%% Regression for Beta


Y = reshape(Output(:,2:21), [size(Output,1)*20,1]);
X = XX;
X = repmat(X, [20,1]);

Tmp2 = size(Output,1);
StdX = zeros(Tmp2,20);

StdX(XX(:,4) == 0,6:10) = 1;
StdX(XX(:,4) == 1,11:20) = 1;
StdX = reshape(StdX, [Tmp2*20,1]);
SQ = zeros(Tmp2,20);
SQ(XX(:,4) == 0,[1 6]) = 1;
SQ(XX(:,4) == 1,[1 11]) = 1;
SQ = reshape(SQ, [Tmp2*20,1]);
Cost = zeros(Tmp2,20);
Cost(XX(:,4) == 0,[5 10]) = 1;
Cost(XX(:,4) == 1,[10 20]) = 1;
Cost = reshape(Cost, [Tmp2*20,1]);

X = [X, [StdX, SQ, Cost]];
X = X(~isnan(Y),:);
Y = Y(~isnan(Y),:);
T = zeros(size(X,2)+1,2);
P = zeros(size(X,2)+1,1);
res=fitlm(X, log(Y));
R2 = res.Rsquared.Ordinary;
stdx = sqrt(diag(res.CoefficientCovariance));
VarX =var(table2array(res.Residuals(:,1)));
COV2 = res.CoefficientCovariance;
T(:,1) =table2array(res.Coefficients(:,1));
T(:, 2) =stdx;
pval = pv(T(:,1, 1), stdx);
P(:, 1) =pval;

for j =1:(size(X,2)+1)
    if P(j,1) <= 0.01
        Table_out(j+1,3) = {[num2str(T(j,1,1), '%8.4f'), '***     ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) <= 0.05 &&  P(j,1) > 0.01
        Table_out(j+1,3) = {[num2str(T(j,1,1), '%8.4f'), '**      ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) <= 0.1 &&  P(j,1) > 0.05
        Table_out(j+1,3) = {[num2str(T(j,1,1), '%8.4f'), '*      ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) > 0.1
        Table_out(j+1,3) = {[num2str(T(j,1,1), '%8.4f'), '       ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    end
end
Table_out(size(Table_out,1)-1,3) = {num2str(R2, '%1.4f')};
Table_out(size(Table_out,1),3) = {num2str(size(Y,1), '%8.4f')};

SimX = mvnrnd(T(:,1)', res.CoefficientCovariance, 10000);
SimXX = zeros(10000,6,2);
SimXX(:,:,1) = [SimX(:,2)*4,SimX(:,2)*8,SimX(:,2)*12, SimX(:,3)*0.4,SimX(:,3)*0.8,SimX(:,3)*1.2]; % CT and NP
SimXX(:,:,2) = [SimX(:,2)*4,SimX(:,2)*8,SimX(:,2)*12, SimX(:,3)*0.4,SimX(:,3)*0.8,SimX(:,3)*1.2]; % CT and NP
SimXXExact = zeros(6,2);
SimXXExact(:,1) = [T(2,1)*4,T(2,1)*8,T(2,1)*12, T(3,1)*0.4,T(3,1)*0.8,T(3,1)*1.2]';
SimXXExact(:,2)= [T(2,1)*4,T(2,1)*8,T(2,1)*12, T(3,1)*0.4,T(3,1)*0.8,T(3,1)*1.2]';

SimXXX = [SimX(:,7)+ SimX(:,8),SimX(:,7)+ SimX(:,8)]; % Only Stdx - SQ 
SimXXXExact = [T(7,1)+T(8,1),T(7,1)+T(8,1)];

SimCost = [SimX(:,4), SimX(:,6)];
SimCostExact = [T(4,1), T(6,1)];

SimXXXX = [0*SimX(:,5), SimX(:,5)]; % 5 or 10 att
SimXXXXExact = [0, T(5,1)];

TmpX = [log(0.025), log(0.005)]; % 5% and 1% (true value is 0.5 - std of random parameter)
for i = 1:3
    for j = 1:3
        for k = 1:2
            for l = 1:2
                MinDraws = exp((SimX(:,1)+ SimXX(:,i,l) + SimXX(:,3+j,l) + SimXXX(:,l) + SimXXXX(:,l)- TmpX(k))./(-SimCost(:,l)));
                if Exact == 0
                     SimB(k,l, j,i) =  {[num2str(median(MinDraws), '%8.0f'), '                           ', '[', num2str(prctile(MinDraws, 2.5), '%8.0f'), '-',num2str(prctile(MinDraws, 97.5), '%8.0f'), ']']};
                else
                    MinDrawsExact = exp((T(1,1) + SimXXExact(i,l) + SimXXExact(3+j,l) + SimXXXXExact(l) +SimXXXExact(l)- TmpX(k))./(-SimCostExact(l)));
                    SimB(k,l, j,i) =  {[num2str(MinDrawsExact, '%8.0f'), '                           ', '[', num2str(prctile(MinDraws, 2.5), '%8.0f'), '-',num2str(prctile(MinDraws, 97.5), '%8.0f'), ']']};
                end
            end
        end
    end
end
SimB = reshape(SimB, [4,9]);


%% Regression for Z-stats

Y = reshape(Output(:,22:41), [size(Output,1)*20,1]);
X = XX;
X = repmat(X, [20,1]);

StdX = zeros(Tmp2,20);

StdX(XX(:,4) == 0,6:10) = 1;
StdX(XX(:,4) == 1,11:20) = 1;
StdX = reshape(StdX, [Tmp2*20,1]);
SQ = zeros(Tmp2,20);
SQ(XX(:,4) == 0,[1 6]) = 1;
SQ(XX(:,4) == 1,[1 11]) = 1;
SQ = reshape(SQ, [Tmp2*20,1]);
Cost = zeros(Tmp2,20);
Cost(XX(:,4) == 0,[5 10]) = 1;
Cost(XX(:,4) == 1,[10 20]) = 1;
Cost = reshape(Cost, [Tmp2*20,1]);

X = [X, [StdX, SQ, Cost]];
X = X(~isnan(Y),:);
Y = Y(~isnan(Y),:);

T = zeros(size(X,2)+1,2);
P = zeros(size(X,2)+1,1);

res=fitlm(X, log(Y));

R2 = res.Rsquared.Ordinary;
stdx = sqrt(diag(res.CoefficientCovariance));
COV3 = res.CoefficientCovariance;
VarX =var(table2array(res.Residuals(:,1)));
T(:,1) =table2array(res.Coefficients(:,1));
T(:, 2) =stdx;
pval = pv(T(:,1, 1), stdx);
P(:, 1) =pval;

for j =1:(size(X,2)+1)
    if P(j,1) <= 0.01
        Table_out(j+1,4) = {[num2str(T(j,1,1), '%8.4f'), '***     ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) <= 0.05 &&  P(j,1) > 0.01
        Table_out(j+1,4) = {[num2str(T(j,1,1), '%8.4f'), '**      ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) <= 0.1 &&  P(j,1) > 0.05
        Table_out(j+1,4) = {[num2str(T(j,1,1), '%8.4f'), '*      ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
    elseif  P(j,1) > 0.1
        Table_out(j+1,4) = {[num2str(T(j,1,1), '%8.4f'), '       ', '(', num2str(T(j,2,1), '%8.4f'), ')']};
 
    end
end

Table_out(size(Table_out,1)-1,4) = {num2str(R2, '%1.4f')};
Table_out(size(Table_out,1),4) = {num2str(size(Y,1), '%8.4f')};

Name = 'Results20190308.xls';
xlswrite(Name, Table_out, 'Table E2', 'A1');

Head = {'4', '4', '4', '8', '8', '8', '12', '12', '12'; '400' , '800', '1200', '400' , '800', '1200','400' , '800', '1200'};
xlswrite(Name, Head, 'Table E1', 'B1');

xlswrite(Name, SimLL(1,:), 'Table E1', 'B2');
xlswrite(Name, SimB(1,:), 'Table E1', 'B3');

xlswrite(Name, SimLL(2,:), 'Table E1', 'B5');
xlswrite(Name, SimB(3,:), 'Table E1', 'B6');

RowName = {'LL: 5 attributes'; 'B: 5 attributes'; ' '; 'LL: 10 attributes'; 'B: 10 attributes'};
xlswrite(Name, RowName, 'Table E1', 'A2');
